Using LODES origin destination data to map where commuters are heading in the Philadelphia metro area.

Pull in tracts and generate centroids

tracts1 <- tigris::tracts(state="PA", county = c("101", "045","017","029",
                                               "091"), year = 2019) %>% sf::st_as_sf() 
tracts2 <- tigris::tracts(state="NJ", county = c("007","005","015","033"), year = 2019) %>% sf::st_as_sf() 
tracts3 <- tigris::tracts(state="DE", county = c("003"), year = 2019) %>% sf::st_as_sf() 
tracts4 <- tigris::tracts(state="MD", county = c("015"), year = 2019) %>% sf::st_as_sf() 


tracts1.centroids <- tracts1 %>%
  st_centroid()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
tracts2.centroids <- tracts2 %>%
  st_centroid()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
tracts3.centroids <- tracts3 %>%
  st_centroid()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
tracts4.centroids <- tracts4 %>%
  st_centroid()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
centroids <- rbind(tracts1.centroids, tracts2.centroids, tracts3.centroids, tracts4.centroids) %>% st_transform(4326)
tracts <- rbind(tracts1, tracts2, tracts3, tracts4)

#list of GEOIDS for tracts in the MSA
centroid.geoids <- c(centroids$GEOID)

Pull LODES data for the four states. Must pull in data for commuters inside each state (od_main), and people who commute in from other states (od_aux). Once we have these, aggergrate blocks to tracts, and filter for origin and destinations that are inside of the MSA. Combine into ‘master’ object that contains every combination of origin destination commuter patterns in the entire MSA.

in.pa <- read_csv("https://lehd.ces.census.gov/data/lodes/LODES7/pa/od/pa_od_main_JT03_2019.csv.gz")
into.pa <- read_csv("https://lehd.ces.census.gov/data/lodes/LODES7/pa/od/pa_od_aux_JT03_2019.csv.gz")

in.nj <- read_csv("https://lehd.ces.census.gov/data/lodes/LODES7/nj/od/nj_od_main_JT03_2019.csv.gz")
into.nj <- read_csv("https://lehd.ces.census.gov/data/lodes/LODES7/nj/od/nj_od_aux_JT03_2019.csv.gz")

in.de <- read_csv("https://lehd.ces.census.gov/data/lodes/LODES7/de/od/de_od_main_JT03_2019.csv.gz")
into.de <- read_csv("https://lehd.ces.census.gov/data/lodes/LODES7/de/od/de_od_aux_JT03_2019.csv.gz")


in.md <- read_csv("https://lehd.ces.census.gov/data/lodes/LODES7/md/od/md_od_main_JT03_2019.csv.gz")
into.md <-read_csv("https://lehd.ces.census.gov/data/lodes/LODES7/md/od/md_od_aux_JT03_2019.csv.gz")

#Filter for inside of MSA
in.pa <- in.pa %>%
  mutate("work_tract" = substring(as.character(w_geocode), 0, 11),
         "home_tract" = substring(as.character(h_geocode), 0,11)) %>%
  filter(home_tract %in% centroid.geoids & work_tract %in% centroid.geoids) %>%
  select(!c(h_geocode, w_geocode)) %>%
  group_by(home_tract, work_tract) %>%
  summarize_if(is.numeric, sum)

into.pa <- into.pa %>%
  mutate("work_tract" = substring(as.character(w_geocode), 0, 11),
         "home_tract" = substring(as.character(h_geocode), 0,11)) %>%
  filter(home_tract %in% centroid.geoids & work_tract %in% centroid.geoids) %>%
  select(!c(h_geocode, w_geocode)) %>%
  group_by(home_tract, work_tract) %>%
  summarize_if(is.numeric, sum)

in.nj <- in.nj %>%
  mutate("work_tract" = substring(as.character(w_geocode), 0, 11),
         "home_tract" = substring(as.character(h_geocode), 0,11)) %>%
  filter(home_tract %in% centroid.geoids & work_tract %in% centroid.geoids) %>%
  select(!c(h_geocode, w_geocode)) %>%
  group_by(home_tract, work_tract) %>%
  summarize_if(is.numeric, sum)

into.nj <- into.nj %>%
  mutate("work_tract" = substring(as.character(w_geocode), 0, 11),
         "home_tract" = substring(as.character(h_geocode), 0,11)) %>%
  filter(home_tract %in% centroid.geoids & work_tract %in% centroid.geoids) %>%
  select(!c(h_geocode, w_geocode)) %>%
  group_by(home_tract, work_tract) %>%
  summarize_if(is.numeric, sum)

in.de <- into.de %>%
  mutate("work_tract" = substring(as.character(w_geocode), 0, 11),
         "home_tract" = substring(as.character(h_geocode), 0,11)) %>%
  filter(home_tract %in% centroid.geoids & work_tract %in% centroid.geoids) %>%
  select(!c(h_geocode, w_geocode)) %>%
  group_by(home_tract, work_tract) %>%
  summarize_if(is.numeric, sum)

into.de <- into.de %>%
  mutate("work_tract" = substring(as.character(w_geocode), 0, 11),
         "home_tract" = substring(as.character(h_geocode), 0,11)) %>%
  filter(home_tract %in% centroid.geoids & work_tract %in% centroid.geoids) %>%
  select(!c(h_geocode, w_geocode)) %>%
  group_by(home_tract, work_tract) %>%
  summarize_if(is.numeric, sum)

in.md <- in.md %>%
  mutate("work_tract" = substring(as.character(w_geocode), 0, 11),
         "home_tract" = substring(as.character(h_geocode), 0,11)) %>%
  filter(home_tract %in% centroid.geoids & work_tract %in% centroid.geoids) %>%
  select(!c(h_geocode, w_geocode)) %>%
  group_by(home_tract, work_tract) %>%
  summarize_if(is.numeric, sum)

into.md <- into.md %>%
  mutate("work_tract" = substring(as.character(w_geocode), 0, 11),
         "home_tract" = substring(as.character(h_geocode), 0,11)) %>%
  filter(home_tract %in% centroid.geoids & work_tract %in% centroid.geoids) %>%
  select(!c(h_geocode, w_geocode)) %>%
  group_by(home_tract, work_tract) %>%
  summarize_if(is.numeric, sum)




master <- rbind(in.pa, into.pa, in.nj, into.nj, in.md, into.md, in.de, into.de)

Now we have to join the geographies for both home and work tracts. Generate “od” flag and “lineid” that will be used when we create a polyline. Combine these together for a long form data frame that contains geographies for both home and destination centroid of every OD combination in the MSA.

master.tracts.home <- master %>%
  left_join(centroids, by = c("home_tract" = "GEOID")) %>%
  mutate("lat" = INTPTLAT,
         "long" = INTPTLON) %>%
  select(c(1:12, 24:27)) %>%
  mutate(od = "home",
         lineid = (paste0(home_tract, work_tract)))%>%
  na.omit()


master.tracts.work <- master %>%
  left_join(centroids, by = c("work_tract" = "GEOID")) %>%
  mutate("lat" = INTPTLAT,
         "long" = INTPTLON) %>%
  select(c(1:12, 24:27)) %>%
  mutate(od = "work",
         lineid = (paste0(home_tract, work_tract))) %>%
  na.omit()


outlonger <- rbind(master.tracts.home, master.tracts.work)

Now we can create the polylines. First, we can filter for total workers above a certain amount to make data more visible. Also we filter out combinations where the home tract is the same was the work tract, since there is no commute distance here. We then group by the line ID to create a multipoint feature, and create a new spatial feature using the lat long fields. When we group by lineid we lose the worker counts, so can pull the original longerdata frame, select worker counts and rejoin this. All that is left to do is cast the object to a LINESTRING.

outlonger.filter <- outlonger %>%
  filter(S000 > 70 & home_tract != work_tract)

multipoint.longer <- outlonger.filter %>%
  st_as_sf() %>%
  group_by(lineid) %>%
  summarize(do_union = F)

multipoint.longer <- multipoint.longer %>% st_as_sf(coords = c('lat', 'long')) %>% st_set_crs(4326)


work.counts <- outlonger %>%
  ungroup() %>%
  select(c(18),c(3:12)) %>%
  group_by(lineid) %>%
  summarize_if(is.numeric,sum)

multipoint <- multipoint.longer %>%
  left_join(work.counts, by = "lineid")

commute.lines <- multipoint %>% st_cast(to = 'LINESTRING') 

Now we can map:

library(tmap)
tmap_mode("view") 
tm_shape(tracts)+
  tm_polygons() +
tm_shape(commute.lines) +
   tm_lines()